package com.eric;

public class Config {

	public static int ARTICLE_WORDS_COUNT = 250;

	public static int MIN_LARGE_P_WORDS = 20;// 页面最大文章块的最少字数

	public static int Min_PARAGRAPH_WORDS = 25;// 文章段最少字数

	public static int LISTING_WORDS_LIMIT = 25;

	public static int AS_KW = 5;

	// 分隔符，用于额外拆分TDK处的关键词，并未为分词器使用
	public static String REGEX_EN_SEPARATOR = "\\s|\\p{P}";

	public static String REGEX_CN_SEPARATOR = "[\u2026\u002e\u4e36\uff5e\uff01\u0040\u0023\uffe5\u0025\u2026\u0023\u003b\uff08\uff09\u2014\u002b\u0060\u003d\u007b\u007d\u3010\u3011\u3001\u007c\uff1b\uff1a\u2018\u201c\uff0c\u300a\u3002\u300b\u002f\uff1f\u0026\u0023\u003b\u002d]";

	public static String REGEX_SEPARATOR = REGEX_EN_SEPARATOR + "|" + REGEX_CN_SEPARATOR;

	public static String REGEX_SIMPLE_SEPARATOR = "[\uff0c\uff01\u3002\uff1f]";

	public static String REGEX_OTHER_MARK = "[\u2764\u007e]";

	public static String REGEX_CN_CHAR = "[\u4e00-\u9fa5]";

	public static String REGEX_EN_CHAR = "[a-zA-Z]";

	public static String REGEX_LETTER_NUM = "[A-Za-z0-9]";

	public static String DEFAULT_CHARSET = "utf-8";

	public static enum INPUT_TYPE {
		STRING, FILE, URL, STR_URL, STR_FILE, DOC
	}

	public static enum DOC_PART {
		ANCHOR, ARTICLE, DOC
	}

	public static String[] COMMON_ACTIONS = new String[] { "下载", "分享", "保存", "播放", "上传", "回复" };

	public static String[] NONESENSE = new String[] { "顶", "看帖必回", "路过", "沙发", "板凳" };

	public static String DOCTYPE = "<!DOCTYPE html[^>]*>";

	public static int DESCRIPTION_LENGTH = 50;

	public static int TITLE_LENGTH = 25;

	public static int OUT_LINKS_LIMIT = 20;

	public static String STOP_WORDS = "http://www.liuxingjia.cc/constants/stopwords.txt";

	public static String LOG4J_PROP = "http://www.liuxingjia.cc/constants/log4j.properties";

	public static String GENERAL_CUSTOM_KW = "http://www.liuxingjia.cc/constants/general_custom_kw.txt";

	public static String KW_PAIR_SEPARATOR = ":";

	public static int KW_PAIR_SUPPORT = 2;

}
